#pragma once
#include "cuda.h"
#include "cuda_runtime.h"
#include "cuda_fp16.h"
#include "tensor.h"

template<typename T>
void launchBuildCausalMask(
    TensorWrapper<T> *mask,
    TensorWrapper<int> *q_lens,
    TensorWrapper<int> *k_lens
);